{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "22f037ad-d2ec-485a-9798-9ac8bf350480",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "# Anscombe's Quartet -- Core Computations\n",
    "\n",
    "The raw data has four series.\n",
    "\n",
    "We'll define a some classes and ingest the data.\n",
    "\n",
    "Globals:\n",
    "\n",
    "-  `quartet` has the quartet data that can be used for presentations."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bd8a963a-565d-477d-ad53-e8f79d338b72",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a79d2991-7893-4cb2-b001-b55888c68efc",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import json\n",
    "from pathlib import Path\n",
    "import statistics\n",
    "\n",
    "from pydantic import BaseModel"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "601a2daf-fa0d-4587-8605-a0e7a0258eb2",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "99a55c73-67ad-4ce0-9faf-7921a179e5f2",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Pair(BaseModel):\n",
    "    x: float\n",
    "    y: float\n",
    "\n",
    "class Series(BaseModel):\n",
    "    series: str\n",
    "    data: list[Pair]\n",
    "\n",
    "    @property\n",
    "    def x(self) -> list[float]:\n",
    "        return [p.x for p in self.data]\n",
    "        \n",
    "    @property\n",
    "    def y(self) -> list[float]:\n",
    "        return [p.y for p in self.data]\n",
    "\n",
    "    @property\n",
    "    def correlation(self) -> float:\n",
    "        return statistics.correlation(self.x, self.y)\n",
    "\n",
    "    @property\n",
    "    def regression(self) -> tuple[float, float]:\n",
    "        return statistics.linear_regression(self.x, self.y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "630f062b-10f8-44a7-b01f-5266143377c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from math import isclose\n",
    "test = Series(\n",
    "    series=\"test\", \n",
    "    data=[Pair(x=2, y=4), Pair(x=3, y=6), Pair(x=5, y=10)]\n",
    ")\n",
    "assert isclose(test.correlation, 1.0)\n",
    "assert isclose(test.regression.slope, 2.0)\n",
    "assert isclose(test.regression.intercept, 0.0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53d95769-cdbf-4723-8d87-94c6a7ed2c3e",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Data Loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f5030f1-f7bb-4254-a829-86cf198eed9c",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "source = Path.cwd().parent.parent / \"data\" / \"anscombe.json\"\n",
    "with source.open() as source_file:\n",
    "    json_document = json.load(source_file)\n",
    "    source_data = (Series.model_validate(s) for s in json_document)\n",
    "    quartet = {s.series: s for s in source_data}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df4a3189-55df-42f0-ad86-0b61e41cedba",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "quartet['I']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13108efb-7c10-4250-8c24-4bff9d53f66c",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "quartet['IV']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c381c215-048e-4534-88a2-0c8818efd0ae",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Statistical Computations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b3ae023-bcf4-43cb-9444-692174a58e76",
   "metadata": {},
   "outputs": [],
   "source": [
    "quartet['I'].correlation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e67f9021-f8c0-4ff9-a42a-cc11f251131d",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "r = quartet['I'].regression\n",
    "f\"y = {r.slope:.1f} * x + {r.intercept:.1f}\""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
